import pandas as pd
import math
from datetime import datetime
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from bots.botlibs.labeling_lib import *
from bots.botlibs.tester_lib import test_model
from bots.botlibs.export_lib import export_model_to_ONNX
from bots.botlibs.fixing_lib import *
from mapie.classification import CrossConformalClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

def get_prices() -> pd.DataFrame:
    p = pd.read_csv('files/'+hyper_params['symbol']+'.csv', sep='\s+')
    pFixed = pd.DataFrame(columns=['time', 'close'])
    pFixed['time'] = p['<DATE>'] + ' ' + p['<TIME>']
    pFixed['time'] = pd.to_datetime(pFixed['time'], format='mixed')
    pFixed['close'] = p['<CLOSE>']
    pFixed.set_index('time', inplace=True)
    pFixed.index = pd.to_datetime(pFixed.index, unit='s')
    return pFixed.dropna()

def get_features(data: pd.DataFrame) -> pd.DataFrame:
    pFixed = data.copy()
    pFixedC = data.copy()
    count = 0

    for i in hyper_params['periods']:
        pFixed[str(count)] = pFixedC.rolling(i).mean()
        count += 1

    return pFixed.dropna()

def meta_learners_mapie(n_estimators_rf: int,
                       max_depth_rf: int,
                       confidence_level: float = 0.9,
                       CV_folds = 15):
    dataset = get_labels(get_features(get_prices()), min=5, max=15, markup=hyper_params['markup'])
    data = dataset[(dataset.index < hyper_params['forward']) & (dataset.index > hyper_params['backward'])].copy()
    # data = fix_labels_subset_mean(data, n_clusters=50, subset_size=50)
    # Extract features and target
    feature_columns = list(data.columns[1:-2])
    X = data[feature_columns]
    y = data['labels']

    mapie_classifier = CrossConformalClassifier(
                        estimator=RandomForestClassifier(n_estimators=n_estimators_rf, 
                                                         max_depth=max_depth_rf,),
                        # estimator = LogisticRegression(),
                        confidence_level=confidence_level,
                        cv=CV_folds,
                        ).fit_conformalize(X, y)
    predicted, y_prediction_sets = mapie_classifier.predict_set(X)
    y_prediction_sets = np.squeeze(y_prediction_sets, axis=-1)
    # Calculate set sizes (sum across classes for each sample)
    set_sizes = np.sum(y_prediction_sets, axis=1)
    # Initialize meta_labels
    data['conformal_labels'] = 0.0
    # Mark labels as "good" (1.0) only where prediction set size is exactly 1
    data.loc[set_sizes == 1, 'conformal_labels'] = 1.0
    # Report statistics on prediction sets
    empty_sets = np.sum(set_sizes == 0)
    single_element_sets = np.sum(set_sizes == 1)
    multi_element_sets = np.sum(set_sizes >= 2)
    
    print(f"Empty sets (meta_labels=0): {empty_sets}")
    print(f"Single element sets (meta_labels=1): {single_element_sets}")
    print(f"Multi-element sets (meta_labels=0): {multi_element_sets}")

    # Initialize meta_labels column with zeros
    data['meta_labels'] = 0.0
    # Set meta_labels to 1 where predicted labels match original labels
    # Compare predicted values with the original labels in the dataset
    data.loc[predicted == data['labels'], 'meta_labels'] = 1.0
    
    # Print summary of meta_labels
    correct_predictions = np.sum(data['meta_labels'] == 1.0)
    incorrect_predictions = len(data) - correct_predictions
    print(f"Correct predictions (meta_labels=1): {correct_predictions}")
    print(f"Incorrect predictions (meta_labels=0): {incorrect_predictions}")

    # Return the dataset with features, original labels, and meta labels
    return data

def fit_final_models(dataset) -> list:
    # features for model\meta models. We learn main model only on filtered labels 
    X = dataset[dataset['meta_labels']==1]
    X = X[X.columns[1:-3]]
    X_meta = dataset[dataset.columns[1:-3]]
    # labels for model\meta models
    y = dataset[dataset['meta_labels']==1]
    y = y[y.columns[-3]]
    y_meta = dataset['conformal_labels']

    y = y.astype('int16')
    y_meta = y_meta.astype('int16')

    # train\test split
    train_X, test_X, train_y, test_y = train_test_split(
        X, y, train_size=0.8, test_size=0.2, shuffle=True)
    
    train_X_m, test_X_m, train_y_m, test_y_m = train_test_split(
        X_meta, y_meta, train_size=0.8, test_size=0.2, shuffle=True)

    # learn main model with train and validation subsets
    model = CatBoostClassifier(iterations=500,
                               custom_loss=['Accuracy'],
                               eval_metric='Accuracy',
                               verbose=False,
                               use_best_model=True,
                               task_type='CPU')
    model.fit(train_X, train_y, eval_set=(test_X, test_y),
              early_stopping_rounds=15, plot=False)
    
    # learn meta model with train and validation subsets
    meta_model = CatBoostClassifier(iterations=500,
                                    custom_loss=['Accuracy'],
                                    eval_metric='Accuracy',
                                    verbose=False,
                                    use_best_model=True,
                                    task_type='CPU')
    meta_model.fit(train_X_m, train_y_m, eval_set=(test_X_m, test_y_m),
              early_stopping_rounds=15, plot=False)
    data = get_features(get_prices())
    R2 = test_model(data, 
                    [model, meta_model], 
                    hyper_params['stop_loss'], 
                    hyper_params['take_profit'],
                    hyper_params['forward'],
                    hyper_params['backward'],
                    hyper_params['markup'],
                    plt=False)
    
    if math.isnan(R2):
        R2 = -1.0
        print('R2 is fixed to -1.0')
    print('R2: ' + str(R2))
    return [R2, model, meta_model]


hyper_params = {
    'symbol': 'EURUSD_H1',
    'export_path': '/Users/dmitrievsky/Library/Containers/com.isaacmarovitz.Whisky/Bottles/54CFA88F-36A3-47F7-915A-D09B24E89192/drive_c/Program Files/MetaTrader 5/MQL5/Include/Trend following/',
    'model_number': 0,
    'markup': 0.00010,
    'stop_loss':  0.00500,
    'take_profit': 0.00200,
    'periods': [i for i in range(5, 300, 30)],
    'backward': datetime(2020, 1, 1),
    'forward': datetime(2025, 1, 1),
}


models = []
for i in range(10):
    print('Learn ' + str(i) + ' model')
    models.append(fit_final_models(meta_learners_mapie(15, 5, confidence_level=0.90, CV_folds=5)))
                                  

models.sort(key=lambda x: x[0])
data = get_features(get_prices())
test_model(data, 
        models[-1][1:], 
        hyper_params['stop_loss'], 
        hyper_params['take_profit'],
        hyper_params['forward'],
        hyper_params['backward'],
        hyper_params['markup'],
        plt=True)

models[-1][2].get_best_score()['validation']

export_model_to_ONNX(model = models[-1],
                     symbol = hyper_params['symbol'],
                     periods = hyper_params['periods'],
                     periods_meta = hyper_params['periods'],
                     model_number = hyper_params['model_number'],
                     export_path = hyper_params['export_path'])

